home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Aminet 1 (Walnut Creek)
/
Aminet - June 1993 [Walnut Creek].iso
/
usenet
/
sources
/
volume90
/
util
/
bsindex1
/
part01
/
src
/
expression.c
< prev
next >
Wrap
C/C++ Source or Header
|
1990-02-02
|
11KB
|
520 lines
/*
* EXPRESSION.C
*
* This module contains the routines needed to parse and evaluate an
* expression. This is used while selecting which files to print.
*
* This is the BNF for the set of expressions recognised by the
* database searcher. It doesn't contain any semantic information however.
*
* Expression ::= Boolean | Boolean Operator Expression | ALL
* Operator ::= AND | OR
* Boolean ::= NOT Boolean |
* '(' Expression ')' |
* BoolIdent |
* NumIdent Op Value |
* StringIdent Op String |
* DateIdent Op Date
* Op ::= '<' | '>' | '<=' | '>=' | '=' | '<>'
* BoolIdent ::= BINARY | ONLINE | LOCAL | VALID
* StringIdent ::= COMMENT | DISKNAME | NAME | OWNER | PATHNAME
* NumIdent ::= ACCESS | SECTION | DIRECTORY | DISKDIRNUM |
* SIZE | KSIZE
* DateIdent ::= DATE
* String ::= (Text enclosed in quotes)
* Value ::= (A number)
* Date ::= (A date in the format "dd-mon-yy")
*
*/
#ifndef LATTICE_50
#include "system.h"
#endif
#include "bbsindex.h"
#define mystrcmp stricmp /* Make match() insensitive to case */
/*
* BNF procedures
*/
void bnf_op(), bnf_date(), bnf_expression(), bnf_boolean();
/*
* Global variable(s)
*/
static int treepos; /* Next free entry in tree array */
static int curtoken; /* Current token */
static int curvalue; /* Current number with E_NUMBER */
static char curstring[MAXCOM]; /* Current string with E_STRING */
/*
* Tokens recognised as special in expressions
*/
struct {
int tag;
char *name;
} tokens[] = {
{ E_EQ, "=" },
{ E_NE, "<>" },
{ E_LE, "<=" },
{ E_GE, ">=" },
{ E_LT, "<" },
{ E_GT, ">" },
{ E_AND, "AND" },
{ E_OR, "OR" },
{ E_NOT, "NOT" },
{ E_ALL, "ALL" },
{ E_OPENPAR, "(" },
{ E_CLOSEPAR, ")" },
{ E_TEXT, "TEXT" },
{ E_REMOTE, "REMOTE" },
{ E_INVALID, "INVALID" },
{ E_OFFLINE, "OFFLINE" },
{ NULL, NULL }
};
/*
* wild()
* ------
* This routine does a wildcard match on the two specified strings.
* The first string contains the string to check, and the second string
* containts the wildcard pattern to check against. The special wild
* card characters are '?' which matches any single characters, and
* '*' which matches any number of characters. 0 is returned if the
* the two strings match, else a +ve or -ve number.
*
*/
int wild(s,w)
char *s;
char *w;
{
char *p;
char ch;
while (*w && *s) {
switch (*w) {
case '*':
ch = toupper(w[1]);
if (!ch)
return (0);
for (p = s; *p; p++) {
if (toupper(*p) == ch || ch == '?') {
if (!wild(p,w+1))
return (0);
}
}
return (toupper(*p)-ch);
case '?':
break;
default:
if (toupper(*s) != toupper(*w))
return (toupper(*s)-toupper(*w));
}
w++;
s++;
}
return (toupper(*s)-toupper(*w));
}
/*
* Dirty great macro to compare two values according to an operator
*/
#define e_cmp(v1,op,v2) \
switch (op) { \
case E_EQ: return ((v1) == (v2)); \
case E_NE: return ((v1) != (v2)); \
case E_LT: return ((v1) < (v2)); \
case E_GT: return ((v1) > (v2)); \
case E_LE: return ((v1) <= (v2)); \
case E_GE: return ((v1) >= (v2)); \
}
#define e_numcmp(n) e_cmp(n, e->op, e->num)
#define e_strcmp(s) e_cmp(mystrcmp(s,e->text), e->op, 0)
#define e_wildcmp(s) e_cmp(wild(s,e->text), e->op, 0)
/*
* match()
* -------
* Scans the the parse tree, and returns TRUE if the current
* record matches the criteria in the tree, else FALSE.
*
* Aside: Aren't C macros wonderful? Just think how long this function
* would be if it wasn't for the above e_cmp, e_numcmp and e_strcmp
* macros.
*/
int match(p, e)
UDHEAD *p;
EXPR *e;
{
switch (e->field) {
case E_AND: return (match(p, e->left) && match(p, e->right));
case E_OR: return (match(p, e->left) || match(p, e->right));
case E_NOT: return (!match(p, e->left));
case E_ALL: return (TRUE);
case I_LOCAL: return ((int)p->local);
case I_BINARY: return ((int)p->bin);
case I_VALID: return ((int)p->valid);
case I_ONLINE: return ((int)p->online);
case E_REMOTE: return (!(int)p->local);
case E_TEXT: return (!(int)p->bin);
case E_INVALID: return (!(int)p->valid);
case E_OFFLINE: return (!(int)p->online);
case I_ACCESS: e_numcmp(p->accesses);
case I_DATE: e_numcmp(p->date);
case I_DIRECTORY: e_numcmp(p->dir);
case I_DISKDIRNUM: e_numcmp(p->dirnum);
case I_KSIZE: e_numcmp(BTOK(p->length));
case I_SECTION: e_numcmp(p->section);
case I_SIZE: e_numcmp(p->length);
case I_DISKNAME: e_strcmp(p->disk_name);
case I_COMMENT: e_strcmp(p->desc);
case I_NAME: e_strcmp(p->cat_name);
case I_OWNER: e_strcmp(p->owner);
case W_DISKNAME: e_wildcmp(p->disk_name);
case W_COMMENT: e_wildcmp(p->desc);
case W_NAME: e_wildcmp(p->cat_name);
case W_OWNER: e_wildcmp(p->owner);
}
}
/*
* newnode()
* ---------
* This function allocates a new node from the tree array, and returns
* a pointer to it. If overflow occurs, it aborts with an error message.
*/
EXPR *newnode()
{
treepos++;
if (treepos >= MAXEXPR) {
scripterror("expression to complex\n");
Cleanup(10);
}
return (&tree[treepos]);
}
/*
* readtoken()
* -----------
* This function reads the next token from the command line, starting
* at position curpos. curtoken is set to the type of token received.
* If it was E_STRING, then curstring points to the string it
* represents (without the quotes). If it was E_NUM, then curvalue
* points to the number represented. If the end of the line is
* reached, E_END is automatically set.
*/
#define nextch() (ch = combuf[compos++])
#define ungetnext() (compos--)
void readtoken()
{
char *p = curstring, ch;
int i;
if (compos >= comlen) {
curtoken = E_END;
return;
}
do {
nextch();
} while (ch == CHAR_SPACE);
if (ch == CHAR_QUOTES) { /* Handle string in quotes */
curtoken = E_STRING;
nextch();
if (*p == CHAR_NULL) {
curtoken = E_END;
return;
}
do {
*p++ = ch;
nextch();
} while (ch && ch != CHAR_QUOTES);
*p = CHAR_NULL;
return;
}
if (isdigit(ch)) { /* Numeric value? */
curtoken = E_NUMBER;
curvalue = 0;
do {
curvalue = curvalue * 10 + (ch - '0');
nextch();
} while (isdigit(ch));
ungetnext();
return;
}
if (isalpha(ch)) { /* Alphabetic identifier? */
do {
*p++ = ch;
nextch();
} while (isalpha(ch));
} else if (ch == CHAR_NULL) { /* At end of line? */
curtoken = E_END;
return;
} else { /* Must be punctuation */
do {
*p++ = ch;
nextch();
} while (ch && !isalpha(ch) && !isdigit(ch) && ch != CHAR_SPACE
&& ch != CHAR_QUOTES);
}
*p = CHAR_NULL;
ungetnext();
/* Check for character A - F */
if (curstring[1] == CHAR_NULL) {
switch (curstring[0]) {
case 'A': case 'B': case 'C':
case 'D': case 'E': case 'F':
curvalue = curstring[0] + 10 - 'A';
curtoken = E_NUMBER;
return;
}
}
/* Now find token that matches string */
for (i = 0; i < MAXINDEX; i++) {
if (!strcmp(curstring, indexes[i].name)) {
curtoken = indexes[i].tag;
return;
}
}
for (i = 0; tokens[i].name; i++) {
if (!strcmp(curstring, tokens[i].name)) {
curtoken = tokens[i].tag;
return;
}
}
scripterror("unrecognised symbol '");
print2(curstring,"' in expression\n");
Cleanup(10);
}
/*
* com_select()
* ------------
* This command parses the expression in the command buffer starting
* at position compos, and builds an expression tree representing it.
* This tree is stored in tree[], starting at element 0. This expression
* tree is used when match() is called.
*/
void com_select()
{
treepos = 0;
readtoken();
bnf_expression(tree);
}
/*
* bnf_expression()
* ----------------
* Parses the BNF line:
*
* Expression ::= Boolean | Boolean Operator Expression | ALL
*
*/
void bnf_expression(e)
EXPR *e;
{
EXPR dummy;
if (curtoken == E_ALL)
e->field = E_ALL;
else {
bnf_boolean(&dummy);
if (curtoken == E_AND || curtoken == E_OR) {
e->field = curtoken;
e->left = newnode();
e->right = newnode();
*(e->left) = dummy;
readtoken();
bnf_expression(e->right);
} else
*e = dummy;
}
}
/*
* bnf_boolean()
* -------------
* Parses the following BNF line:
*
* Boolean ::= NOT Boolean |
* '(' Expression ')' |
* BoolIdent |
* NumIdent Op Value |
* StringIdent Op String |
* DateIdent Op Date
*/
void bnf_boolean(e)
EXPR *e;
{
char *p;
e->field = curtoken;
switch (curtoken) {
case E_NOT:
e->left = newnode();
readtoken();
bnf_boolean(e->left);
break;
case E_OPENPAR:
readtoken();
bnf_expression(e);
if (curtoken != E_CLOSEPAR) {
scripterror("missing close parenthesis\n");
Cleanup(10);
}
readtoken();
break;
case I_BINARY:
case I_VALID:
case I_ONLINE:
case I_LOCAL:
case E_TEXT:
case E_INVALID:
case E_OFFLINE:
case E_REMOTE:
readtoken();
break;
case I_ACCESS:
case I_SECTION:
case I_DIRECTORY:
case I_DISKDIRNUM:
case I_SIZE:
case I_KSIZE:
readtoken();
bnf_op(e);
if (curtoken != E_NUMBER) {
scripterror("number expected in expression\n");
Cleanup(10);
}
e->num = curvalue;
readtoken();
break;
case I_COMMENT:
case I_DISKNAME:
case I_NAME:
case I_OWNER:
case I_PATHNAME:
readtoken();
bnf_op(e);
if (curtoken != E_STRING) {
scripterror("string expected in expression\n");
Cleanup(10);
}
e->text = mymalloc(strlen(curstring)+1);
strcpy(e->text, curstring);
/*
* Now check the string, and see if it contains any wildcard
* characters. If it does, use operation W_{string}
* instead of E_{string}. If it doesn't, use the standard
* strcmp which is much faster.
*/
for (p = curstring; *p; *p++) {
if (*p == '*' || *p == '?') {
e->field = tokentowild(e->field);
break;
}
}
readtoken();
break;
case I_DATE:
readtoken();
bnf_op(e);
bnf_date(e);
break;
default:
scripterror("error in expression\n");
Cleanup(10);
}
}
/*
* bnf_op
* ------
* Sets the operation field of the specified expression to the value
* of the next token on the command line.
*/
void bnf_op(e)
EXPR *e;
{
switch (curtoken) {
case E_EQ:
case E_NE:
case E_LT:
case E_GT:
case E_LE:
case E_GE:
e->op = curtoken;
readtoken();
break;
default:
scripterror("expected comparison operator\n");
Cleanup(10);
}
}
/*
* bnf_date()
* -----------
* Reads a date from the command line, validates it, and sets the
* 'num' field in the expression to its numerical value. The date
* is in the format "dd-mon-yy", e.g. "15-Jun-89"
*/
void bnf_date(e)
EXPR *e;
{
int day, month, year;
if (curtoken != E_STRING) {
scripterror("expected date string in expression\n");
Cleanup(10);
}
if (strlen(curstring) != 9 ||
curstring[2] != '-' || curstring[6] != '-') {
scripterror("Invalid date format (should be dd-mon-yy)\n");
Cleanup(10);
}
day = atoi(curstring);
year = atoi(curstring+7);
for (month = 0; month < 12 && strnicmp(curstring+3, months[month], 3);
month++)
;
if ((day < 1 || day > 31) || (month < 1 || month > 12) ||
(year < 1 || year > 99)) {
scripterror("Invalid date format (should be dd-mon-yy)\n");
Cleanup(10);
}
e->num = (((year * 13) + month) << 5) + day;
readtoken();
}